import pandas as pd
import numpy as np
import statsmodels.api as sm
import scipy.stats as st
import matplotlib.pyplot as plt
import seaborn as sns
import matplotlib.mlab as mlab
df = pd.read_csv('E:\Space_Corrected.csv')
df
| Unnamed: 0.1 | Unnamed: 0 | Company Name | Location | Datum | Detail | Status Rocket | Rocket | Status Mission | |
|---|---|---|---|---|---|---|---|---|---|
| 0 | 0 | 0 | SpaceX | LC-39A, Kennedy Space Center, Florida, USA | Fri Aug 07, 2020 05:12 UTC | Falcon 9 Block 5 | Starlink V1 L9 & BlackSky | StatusActive | 50.0 | Success |
| 1 | 1 | 1 | CASC | Site 9401 (SLS-2), Jiuquan Satellite Launch Ce... | Thu Aug 06, 2020 04:01 UTC | Long March 2D | Gaofen-9 04 & Q-SAT | StatusActive | 29.75 | Success |
| 2 | 2 | 2 | SpaceX | Pad A, Boca Chica, Texas, USA | Tue Aug 04, 2020 23:57 UTC | Starship Prototype | 150 Meter Hop | StatusActive | NaN | Success |
| 3 | 3 | 3 | Roscosmos | Site 200/39, Baikonur Cosmodrome, Kazakhstan | Thu Jul 30, 2020 21:25 UTC | Proton-M/Briz-M | Ekspress-80 & Ekspress-103 | StatusActive | 65.0 | Success |
| 4 | 4 | 4 | ULA | SLC-41, Cape Canaveral AFS, Florida, USA | Thu Jul 30, 2020 11:50 UTC | Atlas V 541 | Perseverance | StatusActive | 145.0 | Success |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 4319 | 4319 | 4319 | US Navy | LC-18A, Cape Canaveral AFS, Florida, USA | Wed Feb 05, 1958 07:33 UTC | Vanguard | Vanguard TV3BU | StatusRetired | NaN | Failure |
| 4320 | 4320 | 4320 | AMBA | LC-26A, Cape Canaveral AFS, Florida, USA | Sat Feb 01, 1958 03:48 UTC | Juno I | Explorer 1 | StatusRetired | NaN | Success |
| 4321 | 4321 | 4321 | US Navy | LC-18A, Cape Canaveral AFS, Florida, USA | Fri Dec 06, 1957 16:44 UTC | Vanguard | Vanguard TV3 | StatusRetired | NaN | Failure |
| 4322 | 4322 | 4322 | RVSN USSR | Site 1/5, Baikonur Cosmodrome, Kazakhstan | Sun Nov 03, 1957 02:30 UTC | Sputnik 8K71PS | Sputnik-2 | StatusRetired | NaN | Success |
| 4323 | 4323 | 4323 | RVSN USSR | Site 1/5, Baikonur Cosmodrome, Kazakhstan | Fri Oct 04, 1957 19:28 UTC | Sputnik 8K71PS | Sputnik-1 | StatusRetired | NaN | Success |
4324 rows × 9 columns
df = df.drop(['Unnamed: 0', 'Unnamed: 0.1'], axis = 1)
df.head()
| Company Name | Location | Datum | Detail | Status Rocket | Rocket | Status Mission | |
|---|---|---|---|---|---|---|---|
| 0 | SpaceX | LC-39A, Kennedy Space Center, Florida, USA | Fri Aug 07, 2020 05:12 UTC | Falcon 9 Block 5 | Starlink V1 L9 & BlackSky | StatusActive | 50.0 | Success |
| 1 | CASC | Site 9401 (SLS-2), Jiuquan Satellite Launch Ce... | Thu Aug 06, 2020 04:01 UTC | Long March 2D | Gaofen-9 04 & Q-SAT | StatusActive | 29.75 | Success |
| 2 | SpaceX | Pad A, Boca Chica, Texas, USA | Tue Aug 04, 2020 23:57 UTC | Starship Prototype | 150 Meter Hop | StatusActive | NaN | Success |
| 3 | Roscosmos | Site 200/39, Baikonur Cosmodrome, Kazakhstan | Thu Jul 30, 2020 21:25 UTC | Proton-M/Briz-M | Ekspress-80 & Ekspress-103 | StatusActive | 65.0 | Success |
| 4 | ULA | SLC-41, Cape Canaveral AFS, Florida, USA | Thu Jul 30, 2020 11:50 UTC | Atlas V 541 | Perseverance | StatusActive | 145.0 | Success |
df.describe()
| Company Name | Location | Datum | Detail | Status Rocket | Rocket | Status Mission | |
|---|---|---|---|---|---|---|---|
| count | 4324 | 4324 | 4324 | 4324 | 4324 | 964 | 4324 |
| unique | 56 | 137 | 4319 | 4278 | 2 | 56 | 4 |
| top | RVSN USSR | Site 31/6, Baikonur Cosmodrome, Kazakhstan | Wed Nov 05, 2008 00:15 UTC | Cosmos-3MRB (65MRB) | BOR-5 Shuttle | StatusRetired | 450.0 | Success |
| freq | 1777 | 235 | 2 | 6 | 3534 | 136 | 3879 |
df.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 4324 entries, 0 to 4323 Data columns (total 7 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Company Name 4324 non-null object 1 Location 4324 non-null object 2 Datum 4324 non-null object 3 Detail 4324 non-null object 4 Status Rocket 4324 non-null object 5 Rocket 964 non-null object 6 Status Mission 4324 non-null object dtypes: object(7) memory usage: 236.6+ KB
ds = df["Company Name"].value_counts().reset_index()[:28]
ds
| index | Company Name | |
|---|---|---|
| 0 | RVSN USSR | 1777 |
| 1 | Arianespace | 279 |
| 2 | CASC | 251 |
| 3 | General Dynamics | 251 |
| 4 | NASA | 203 |
| 5 | VKS RF | 201 |
| 6 | US Air Force | 161 |
| 7 | ULA | 140 |
| 8 | Boeing | 136 |
| 9 | Martin Marietta | 114 |
| 10 | SpaceX | 100 |
| 11 | MHI | 84 |
| 12 | Northrop | 83 |
| 13 | Lockheed | 79 |
| 14 | ISRO | 76 |
| 15 | Roscosmos | 55 |
| 16 | ILS | 46 |
| 17 | Sea Launch | 36 |
| 18 | ISAS | 30 |
| 19 | Kosmotras | 22 |
| 20 | US Navy | 17 |
| 21 | ISA | 13 |
| 22 | Rocket Lab | 13 |
| 23 | Eurockot | 13 |
| 24 | ESA | 13 |
| 25 | Blue Origin | 12 |
| 26 | IAI | 11 |
| 27 | ExPace | 10 |
import plotly.graph_objects as go
fig = go.Figure(go.Bar(x = ds["index"],
y = ds["Company Name"],
marker = dict( color = ds["Company Name"],
colorscale = "bluered")))
fig.update_layout(title = "Number of Launches by Every Company", xaxis_title = "Top 28 Country", yaxis_title = "count", hovermode = "x")
fig.show()
ds = df["Status Rocket"].value_counts().reset_index()
ds
| index | Status Rocket | |
|---|---|---|
| 0 | StatusRetired | 3534 |
| 1 | StatusActive | 790 |
import plotly.express as px
fig = px.pie(ds, values = "Status Rocket", names = "index", title = "Rocket Status")
fig.show()
ds = df["Status Mission"].value_counts().reset_index()
ds
| index | Status Mission | |
|---|---|---|
| 0 | Success | 3879 |
| 1 | Failure | 339 |
| 2 | Partial Failure | 102 |
| 3 | Prelaunch Failure | 4 |
fig = px.bar(ds, x = "index", y = "Status Mission", title = "Mission Status")
fig.show()
np.sum(pd.isna(df.loc[:," Rocket"]))
3360
df_ = df.dropna(subset=[" Rocket"], axis = "rows")
len(df_)
964
df_.loc[:, " Rocket"]
0 50.0
1 29.75
3 65.0
4 145.0
5 64.68
...
3855 59.0
3971 63.23
3993 63.23
4000 63.23
4020 63.23
Name: Rocket, Length: 964, dtype: object
df_.loc[:, " Rocket"] = df_.loc[:, " Rocket"].fillna(0.0).str.replace(",","")
df_.loc[:, " Rocket"] = df_.loc[:, " Rocket"].astype(np.float64).fillna(0.0)
C:\Users\prava\AppData\Local\Temp\ipykernel_17240\3502872187.py:1: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy C:\Users\prava\AppData\Local\Temp\ipykernel_17240\3502872187.py:2: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
df_d = df_[df_.loc[:, " Rocket"]<1000]
plt.figure(figsize = (22,6))
sns.histplot(data = df_d, x = " Rocket", hue = "Status Rocket")
plt.show()
np.sum(pd.isna(df.loc[:,"Status Mission"]))
0
plt.figure(figsize = (22,6))
sns.histplot(data = df_d, x = " Rocket", hue = "Status Mission")
plt.show()
# Total spent money for each company
df_.head()
| Company Name | Location | Datum | Detail | Status Rocket | Rocket | Status Mission | |
|---|---|---|---|---|---|---|---|
| 0 | SpaceX | LC-39A, Kennedy Space Center, Florida, USA | Fri Aug 07, 2020 05:12 UTC | Falcon 9 Block 5 | Starlink V1 L9 & BlackSky | StatusActive | 50.00 | Success |
| 1 | CASC | Site 9401 (SLS-2), Jiuquan Satellite Launch Ce... | Thu Aug 06, 2020 04:01 UTC | Long March 2D | Gaofen-9 04 & Q-SAT | StatusActive | 29.75 | Success |
| 3 | Roscosmos | Site 200/39, Baikonur Cosmodrome, Kazakhstan | Thu Jul 30, 2020 21:25 UTC | Proton-M/Briz-M | Ekspress-80 & Ekspress-103 | StatusActive | 65.00 | Success |
| 4 | ULA | SLC-41, Cape Canaveral AFS, Florida, USA | Thu Jul 30, 2020 11:50 UTC | Atlas V 541 | Perseverance | StatusActive | 145.00 | Success |
| 5 | CASC | LC-9, Taiyuan Satellite Launch Center, China | Sat Jul 25, 2020 03:13 UTC | Long March 4B | Ziyuan-3 03, Apocalypse-10 & N... | StatusActive | 64.68 | Success |
df_money = df_.groupby(["Company Name"])[" Rocket"].sum().reset_index()
df_money = df_money[df_money[" Rocket"] > 0]
df_money.head()
| Company Name | Rocket | |
|---|---|---|
| 0 | Arianespace | 16345.00 |
| 1 | Boeing | 1241.00 |
| 2 | CASC | 6340.26 |
| 3 | EER | 20.00 |
| 4 | ESA | 37.00 |
df_money_ = df_money.sort_values(by = [" Rocket"], ascending = False)
df_money_.head()
| Company Name | Rocket | |
|---|---|---|
| 14 | NASA | 76280.00 |
| 0 | Arianespace | 16345.00 |
| 21 | ULA | 14798.00 |
| 16 | RVSN USSR | 10000.00 |
| 2 | CASC | 6340.26 |
fig = px.bar(df_money_, x = "Company Name", y = " Rocket", title = "Total Spent Money for each Company")
fig.show()
# Mission Numbers by years
df["date"] = pd.to_datetime(df["Datum"])
df.head()
| Company Name | Location | Datum | Detail | Status Rocket | Rocket | Status Mission | date | |
|---|---|---|---|---|---|---|---|---|
| 0 | SpaceX | LC-39A, Kennedy Space Center, Florida, USA | Fri Aug 07, 2020 05:12 UTC | Falcon 9 Block 5 | Starlink V1 L9 & BlackSky | StatusActive | 50.0 | Success | 2020-08-07 05:12:00+00:00 |
| 1 | CASC | Site 9401 (SLS-2), Jiuquan Satellite Launch Ce... | Thu Aug 06, 2020 04:01 UTC | Long March 2D | Gaofen-9 04 & Q-SAT | StatusActive | 29.75 | Success | 2020-08-06 04:01:00+00:00 |
| 2 | SpaceX | Pad A, Boca Chica, Texas, USA | Tue Aug 04, 2020 23:57 UTC | Starship Prototype | 150 Meter Hop | StatusActive | NaN | Success | 2020-08-04 23:57:00+00:00 |
| 3 | Roscosmos | Site 200/39, Baikonur Cosmodrome, Kazakhstan | Thu Jul 30, 2020 21:25 UTC | Proton-M/Briz-M | Ekspress-80 & Ekspress-103 | StatusActive | 65.0 | Success | 2020-07-30 21:25:00+00:00 |
| 4 | ULA | SLC-41, Cape Canaveral AFS, Florida, USA | Thu Jul 30, 2020 11:50 UTC | Atlas V 541 | Perseverance | StatusActive | 145.0 | Success | 2020-07-30 11:50:00+00:00 |
df["year"] = df["date"].apply(lambda datetime: datetime.year)
df.head()
| Company Name | Location | Datum | Detail | Status Rocket | Rocket | Status Mission | date | year | |
|---|---|---|---|---|---|---|---|---|---|
| 0 | SpaceX | LC-39A, Kennedy Space Center, Florida, USA | Fri Aug 07, 2020 05:12 UTC | Falcon 9 Block 5 | Starlink V1 L9 & BlackSky | StatusActive | 50.0 | Success | 2020-08-07 05:12:00+00:00 | 2020 |
| 1 | CASC | Site 9401 (SLS-2), Jiuquan Satellite Launch Ce... | Thu Aug 06, 2020 04:01 UTC | Long March 2D | Gaofen-9 04 & Q-SAT | StatusActive | 29.75 | Success | 2020-08-06 04:01:00+00:00 | 2020 |
| 2 | SpaceX | Pad A, Boca Chica, Texas, USA | Tue Aug 04, 2020 23:57 UTC | Starship Prototype | 150 Meter Hop | StatusActive | NaN | Success | 2020-08-04 23:57:00+00:00 | 2020 |
| 3 | Roscosmos | Site 200/39, Baikonur Cosmodrome, Kazakhstan | Thu Jul 30, 2020 21:25 UTC | Proton-M/Briz-M | Ekspress-80 & Ekspress-103 | StatusActive | 65.0 | Success | 2020-07-30 21:25:00+00:00 | 2020 |
| 4 | ULA | SLC-41, Cape Canaveral AFS, Florida, USA | Thu Jul 30, 2020 11:50 UTC | Atlas V 541 | Perseverance | StatusActive | 145.0 | Success | 2020-07-30 11:50:00+00:00 | 2020 |
ds = df["year"].value_counts().reset_index()
ds
| index | year | |
|---|---|---|
| 0 | 1971 | 119 |
| 1 | 2018 | 117 |
| 2 | 1977 | 114 |
| 3 | 1975 | 113 |
| 4 | 1976 | 113 |
| ... | ... | ... |
| 59 | 2010 | 37 |
| 60 | 2005 | 37 |
| 61 | 1958 | 28 |
| 62 | 1959 | 20 |
| 63 | 1957 | 3 |
64 rows × 2 columns
fig = px.bar(ds, x = "index", y = "year", title = "Missions Number by Year")
fig.show()
# Countries and Mission status
from sklearn.preprocessing import LabelEncoder
encoder = LabelEncoder()
encoder.fit(df["Status Mission"])
encoder
LabelEncoder()
colors = {0: "red",
1 : "Orange",
2 : "Yellow",
3 : "Green"}
colors
{0: 'red', 1: 'Orange', 2: 'Yellow', 3: 'Green'}
countries_dict = {
'Russia' : 'Russian Federation',
'New Mexico' : 'USA',
"Yellow Sea": 'China',
"Shahrud Missile Test Site": "Iran",
"Pacific Missile Range Facility": 'USA',
"Barents Sea": 'Russian Federation',
"Gran Canaria": 'USA'
}
df["country"] = df["Location"].str.split(", ").str[-1].replace(countries_dict)
df.head()
| Company Name | Location | Datum | Detail | Status Rocket | Rocket | Status Mission | date | year | country | |
|---|---|---|---|---|---|---|---|---|---|---|
| 0 | SpaceX | LC-39A, Kennedy Space Center, Florida, USA | Fri Aug 07, 2020 05:12 UTC | Falcon 9 Block 5 | Starlink V1 L9 & BlackSky | StatusActive | 50.0 | Success | 2020-08-07 05:12:00+00:00 | 2020 | USA |
| 1 | CASC | Site 9401 (SLS-2), Jiuquan Satellite Launch Ce... | Thu Aug 06, 2020 04:01 UTC | Long March 2D | Gaofen-9 04 & Q-SAT | StatusActive | 29.75 | Success | 2020-08-06 04:01:00+00:00 | 2020 | China |
| 2 | SpaceX | Pad A, Boca Chica, Texas, USA | Tue Aug 04, 2020 23:57 UTC | Starship Prototype | 150 Meter Hop | StatusActive | NaN | Success | 2020-08-04 23:57:00+00:00 | 2020 | USA |
| 3 | Roscosmos | Site 200/39, Baikonur Cosmodrome, Kazakhstan | Thu Jul 30, 2020 21:25 UTC | Proton-M/Briz-M | Ekspress-80 & Ekspress-103 | StatusActive | 65.0 | Success | 2020-07-30 21:25:00+00:00 | 2020 | Kazakhstan |
| 4 | ULA | SLC-41, Cape Canaveral AFS, Florida, USA | Thu Jul 30, 2020 11:50 UTC | Atlas V 541 | Perseverance | StatusActive | 145.0 | Success | 2020-07-30 11:50:00+00:00 | 2020 | USA |
from plotly.subplots import make_subplots
fig = make_subplots(rows = 4, cols = 4, subplot_titles = df["country"].unique())
for i, country in enumerate(df["country"].unique()):
counts = df[df["country"] == country]["Status Mission"].value_counts(normalize = True)*100
color = [colors[x] for x in encoder.transform(counts.index)]
trace = go.Bar(x = counts.index, y = counts.values, name = country, marker = {"color" : color}, showlegend = False)
fig.add_trace(trace, row = (i//4) + 1, col = (i%4)+1)
fig.update_layout(title = {"text":"Countries and Mission Status"}, height = 1000, width = 1100)
for i in range(1,5):
fig.update_yaxes(title_text = "Percentage", row = i, col = 1)
fig.show()